#Rcode belonging to Cobacho, S. P., van de Leemput, I. A., Holmgren, M., & Christianen, M. J. (2024). Impact of 
#human disturbance on biogeochemical fluxes in tropical seascapes. Marine Environmental Research, 106479.

##Code developed for calculating data summaries

#Set working directory
setwd("C:/Users/pino002/OneDrive - WageningenUR/PhD/Project1-Review/Data")

#Packages
library(readxl)
library(janitor)
library(dplyr)
library(writexl)

#Load excel file
dataset <- read_excel("Bibliographic_data.xlsx",2)

#Data summaries and calculations

dataset<-dataset %>% 
  rename("Year" = "Year published")
dfYears<-tabyl(dataset, Year) #create a table with categories
dfYears <- dfYears[ -c(3:4) ] #Remove columns I don't need 
dfYears <- dfYears[-c(22),]  # Remove row with NAs
totalsum_years<-sum(dfYears$n) # Calculate sum of papers
dfYears$percentage<-(dfYears$n/totalsum_years)*100 # Calculate percentage of papers per year

dfHabitat<-tabyl(dataset, Habitats) #create a table with categories
dfHabitat <- dfHabitat[ -c(3:4) ] #Remove columns I don't need 
dfHabitat <- dfHabitat[-c(8),]  # Remove row with NAs
totalsum_habitat<-sum(dfHabitat$n) # Calculate sum of papers
dfHabitat$percentage<-(dfHabitat$n/totalsum_habitat)*100 # Calculate percentage of papers per category
label_x = c(13, 29, 17.5, 27.5, 31, 12, 15)  # Specify the x-coordinate for each label
dfHabitat$label_x <- label_x # Calculate the x-coordinate for each label

dfFlux<-tabyl(dataset, Classification)
dfFlux <- dfFlux[ -c(3:4) ] 
dfFlux <- dfFlux[-c(6),]  # Remove row with NAs
totalsum_flux<-sum(dfFlux$n)
dfFlux$percentage<-(dfFlux$n/totalsum_flux)*100

dfRegion<-tabyl(dataset, Region) #create a table with categories
dfRegion <- dfRegion[ -c(3:4) ] #Remove columns I don't need 
dfRegion <- dfRegion[-c(8:9),]  # Remove row with NAs
totalsum_region<-sum(dfRegion$n) # Calculate sum of papers
dfRegion$percentage<-(dfRegion$n/totalsum_region)*100 # Calculate percentage of papers per category
dfRegion[1, "Region"] <- "Caribbean" #Rename rows
dfRegion[2, "Region"] <- "Australia"
dfRegion[3, "Region"] <- "South America"
dfRegion[4, "Region"] <- "Asia"
dfRegion[5, "Region"] <- "Indo-Pacific"
dfRegion[6, "Region"] <- "Africa"
dfRegion[7, "Region"] <- "Middle East"

#Export calculated summaries

#Sheets <- list('Sheet1' = dfYears, 'Sheet2' = dfHabitat, 'Sheet3' = dfFlux)
#write_xlsx(
#  Sheets,"C:/Users/pino002/OneDrive - WageningenUR/PhD/Project1-Review/Data\\Summaries.xlsx",
#  col_names = TRUE,
#  format_headers = TRUE)